Hypotheses: I hypothesize that different CpG loci will be associated with maternal prenatal smoking in African ancestry groups than in European and Hispanic ancestry groups. I hypothesize that the direction of associations between CpGs and prenatal maternal smoking will stay consistent between ages nine and fifteen but that the magnitude of associations will decline over time. Finally, I hypothesize that classification of prenatal maternal smoking status based upon DNA methylation information from salivary samples will be accurate and consistent at ages nine and fifteen.
pdqc_all<-readRDS(paste0(datadir, 'OGData/', "pd_qc.rds"))
pdqc<-pdqc_all %>% filter(probe_fail_10==0 & sex==predicted_sex)
#What FF ids have methylation data?
methy_ids<-pdqc$id
#create slide variable fo pdqc
pdqc$slide <- gsub('_.*', '', pdqc$MethID)
pdqc$array<-gsub('.*_', '', pdqc$MethID)
#ids with 9visit, 15visit or both visit
visit9ids<-pdqc%>%filter(childteen=='C')%>%pull(idnum)%>%unique()
visit15ids<-pdqc%>%filter(childteen=='T')%>%pull(idnum)%>%unique()
bothvisit_ids<-visit9ids[visit9ids %in% visit15ids]
visit9only_ids<-visit9ids[!(visit9ids %in%bothvisit_ids)]
visit15only_ids<-visit15ids[!(visit15ids %in%bothvisit_ids)]#create flag in FF for having methylation data
#and flag for having 2 visits of methylation data
FF_labeled<-FF_labeled %>%
mutate(MethylData=ifelse(id %in% methy_ids, "Analysis subset", "Not in analysis subset"),
TwoVisitData=ifelse(id%in%bothvisit_ids,"Analysis subset - both visits", "Not in both visit analysis subset"))
table(FF_labeled$MethylData)##
## Analysis subset Not in analysis subset
## 880 4018
##
## Analysis subset - both visits Not in both visit analysis subset
## 805 4093
if((length(visit15only_ids)+length(visit9only_ids)+length(bothvisit_ids)!=length(FF_labeled %>% filter(MethylData=="Analysis subset")%>%pull(id)))){print("WARNING ID LENGTHS DO NOT ADD UP")}
if(length(bothvisit_ids)!=length(FF_labeled %>% filter(TwoVisitData=="Analysis subset - both visits")%>%pull(id))){print("WARNING ID LENGTHS DO NOT ADD UP")}#technical replicates will be duplicate id + childteen variables from pdqc
pdqc<-pdqc %>% mutate(newID=paste(idnum, childteen, sep='_'))
techrep_ids<- pdqc %>% select(newID) %>% group_by(newID)%>%filter(n()>1)
techreps<-pdqc %>% filter(newID %in% techrep_ids$newID)%>%arrange(idnum,childteen)
hiPctPF<-techreps %>% group_by(newID) %>%filter(probe_fail_pct==max(probe_fail_pct))%>%pull(MethID)
pdqc_clean<-pdqc %>% filter(!(MethID %in% hiPctPF) & childteen!='M')
myMethIDs<-pdqc_clean %>% pull(MethID)Cell type proportions estimated using epidDISH package and the EpiFibIC reference (generic for epithelial tissues).
betaqc<-readRDS(file=paste0(datadir, 'OGData/', "noob_filtered.rds"))
aprioriCG<-c("cg05575921", "cg04180046", "cg05549655", "cg14179389")
globalmethy<-colMeans(betaqc, na.rm=T)
globalmethdf<-as.data.frame(cbind("MethID"=names(globalmethy), "globalmethylation"=globalmethy))
aprioriCGdf<-as.data.frame(t(betaqc[aprioriCG, ]))
aprioriCGdf$MethID = colnames(betaqc)
#cell type proportions
celltypes<-epidish(beta.m = betaqc, ref.m = centEpiFibIC.m, method = "RPC")
estF_FF<-as.data.frame(celltypes$estF)
estF_FF$MethID = row.names(estF_FF)#read in cpgs from PMC4833289
cpgs<-read.csv(paste0(datadir, 'PMC4833289_SuppFile2CpGs.csv'), stringsAsFactors = F, header=F, skip=2)
#make headers
header1<-scan(paste0(datadir, 'PMC4833289_SuppFile2CpGs.csv'), nlines=1, what=character(), sep=',')
header1[7:26]<-c(rep("SSnewborn", 5), rep("SSnewbornCT", 5), rep("SSolder", 5), rep("anynewborn", 5))
header2<-paste(header1, scan(paste0(datadir, 'PMC4833289_SuppFile2CpGs.csv'), nlines=1, skip=1, what=character(), sep=','), sep='_')
names(cpgs)<-header2
#four meta-EWASes: SS_newborn (sustained smoking in newborns); SS_newbornCT (sustained smoking in newborns controlling for cell type) SS_older (sustained smoking in older children); any_newborn (any smoking in newborns)
#pivot_longer to get single column of coefs and a column for the analysis type
#and group into a list by analysis type
cpgs<-cpgs %>% pivot_longer(cols = c(matches("_Coef|_SE|_P|_FDR|_Direction")), names_to = c("set", ".value"), names_pattern="(.+)_(.+)")%>%rename_with(~gsub('_', '', .x))%>%group_split(set)
cpgs<-cpgs%>%setNames(lapply(cpgs, function(x){x$set %>% unique()}))
#make scores - no transform, zscore and centermean
#first, pull script
source(file.path(codedir, 'UsefulCode', 'makePolyEpiScores.R'))
scores=makePolyEpiScore(m=betaqc, b=cpgs)## [1] "transforming the methylation matrix:none"
## [1] "transforming the methylation matrix:none"
## [1] "transforming the methylation matrix:none"
## [1] "transforming the methylation matrix:none"
## [1] "transforming the methylation matrix:zscore"
## [1] "transforming the methylation matrix:zscore"
## [1] "transforming the methylation matrix:zscore"
## [1] "transforming the methylation matrix:zscore"
## [1] "transforming the methylation matrix:meancenter"
## [1] "transforming the methylation matrix:meancenter"
## [1] "transforming the methylation matrix:meancenter"
## [1] "transforming the methylation matrix:meancenter"
polyscores=list('notransform'=scores, 'zscore'=scores_z, 'center'=scores_center)
polyscores<-lapply(polyscores, function(x) x%>% mutate(MethID=colnames(betaqc)))
save(polyscores, file = file.path(datadir, 'CreatedData', 'polymethylationscores.RDS'))
polyscores_wide<-lapply(seq_along(polyscores), function(i) polyscores[[i]] %>% setNames(c(paste0(colnames(polyscores[[i]])[1:4], '_', names(polyscores)[i]), 'MethID')))%>%reduce(left_join, by='MethID')clocks2<-clocks %>% mutate(idnum=gsub('a', '', idnum))%>%filter(MethID %in% pdqc$MethID) # currently clocks are missing 3 individuals who were in johns pipeline but not jonahs
methyldata<-left_join(left_join(left_join(left_join(left_join(pdqc_clean %>% select(MethID, slide, array,childteen, idnum), globalmethdf), clocks2), estF_FF), aprioriCGdf), polyscores_wide)## Joining, by = "MethID"
## Joining, by = c("MethID", "idnum")
## Joining, by = "MethID"
## Joining, by = "MethID"
## Joining, by = "MethID"
methyldata[, c("globalmethylation", aprioriCG)]<-lapply(methyldata[, c("globalmethylation", aprioriCG)],function(x) as.numeric(as.character(x)))
#lapply(methyldata, class)
methyldata_wide <- methyldata %>% select(-MethID)%>% pivot_wider(., names_from=childteen, values_from=any_of(colnames(methyldata)[4:length(colnames(methyldata))]))#weirdID<-methyldata %>% filter(is.na(globalmethylation))%>%pull(MethID)
#globalmethy[weirdID] #not actually missing global methylation data
#clocks %>% filter(MethID%in%weirdID) # #idnum from not the same as in
#pdqc_clean %>% filter(MethID==weirdID) #this is just a typo in the idnum from data entry, see format of other #idnums
#pdqc_clean %>% pull(idnum) %>% head()
#solution: when creating methyldata, first trim alphabet from clocks$idnum that contain alphabetic characters --> implemented
#also misssing clocks for 3 sex outlier samples that weren't in jonahs' pipeline:
table(is.na(methyldata$horvath13))
weirdIDs<-methyldata %>% filter(is.na(horvath13))%>%pull(MethID)
pdqc_all %>% filter(MethID %in% weirdID) %>% xtabs(~predicted_sex_outlier, data=.)FF_labeled<-FF_labeled %>%mutate(smkPreg_binary=ifelse(m1g4%in% c("1 2+pk/d","2 1<pk<2","3 <1pk/d" ), "Yes", ifelse(m1g4%in%c("-2 Don't know", "-3 Missing", "-9 Not in wave"), NA, ifelse(m1g4== "4 None", "No", ifelse(m1g4=="Missing", "Missing", "FLAG")))))
#xtabs(~m1g4+smkPreg_binary, data=FF_labeled)Ancestry is based on csv files of ancestry-specific PCS created by Erin (per Jonah meeting Oct 6 2020). If an ID was in {african/european/hispanic}, then the individual is labeled as such.
# read in PCs
euroPC<-read.csv("/scratch/bfoxman_root/bfoxman/blostein/FragileFamilies/Files/OGData/pcs/european.csv")
afrPC<-read.csv("/scratch/bfoxman_root/bfoxman/blostein/FragileFamilies/Files/OGData/pcs/african.csv")
hisPC<-read.csv("/scratch/bfoxman_root/bfoxman/blostein/FragileFamilies/Files/OGData/pcs/hispanic.csv")
allPC<-read.table("/scratch/bfoxman_root/bfoxman/blostein/FragileFamilies/Files/OGData/pcs/global_pcs.txt", col.names=c("X", "idnum", paste("PC", 1:20, sep='')))
#individuals in each ancestry specific pc group can be labeled as that ancestry for categorical ancestry variable
FF_labeled<-FF_labeled %>% mutate(ancestry=case_when(MethylData=="Analysis subset" & id %in% hisPC$idnum~"Hispanic ancestry", MethylData=="Analysis subset" & id %in%afrPC$idnum~"African ancestry", MethylData=="Analysis subset" & id %in% euroPC$idnum~"European ancestry", MethylData=="Analysis subset" & !(id %in%c(hisPC$idnum, afrPC$idnum, euroPC$idnum)) & (id %in% allPC$idnum)~"Other ancestry", MethylData=="Analysis subset" & !(id %in%c(hisPC$idnum, afrPC$idnum, euroPC$idnum)) & !(id %in% allPC$idnum)~"Missing PC data", MethylData!="Analysis subset"~"Not in analysis subset"))
#check ancestry variable
#table(FF_labeled$ancestry)
#with(FF_labeled, xtabs(~ancestry+cm1ethrace))
MissingPCdata<-FF_labeled %>% filter(ancestry=="Missing PC data")%>%select(id)
save(MissingPCdata, file = "/scratch/bfoxman_root/bfoxman/blostein/FragileFamilies/Files/CreatedData/MissingPC.Rdata")
#add PC data to FF_labeled
FF_labeled<-left_join(FF_labeled, allPC %>% mutate(id=as.character(idnum)) %>% select(id, PC1, PC2)%>%rename_at(vars(-id), function(x) paste0('global_', x, sep='')))## Joining, by = "id"
localPC<-rbind(euroPC, afrPC, hisPC)
FF_labeled<-left_join(FF_labeled, localPC %>% mutate(id=as.character(idnum)) %>% select(id, PC1, PC2)%>% rename_at(vars(-id), function(x) paste0('local_', x, sep='')))## Joining, by = "id"
There were 47 individuals without PC data. None of these individuals had data in the global PC file, so I assume they are actually missing genetic data (for whatever reason, possibly QC).
Recoded frequency of prenatal alcohol drinking and drug use into Yes/no binary variables for drinking and drug use during pregnancy.
#table(FF_labeled$m1g2) # during preg, alchohol freq
#table(FF_labeled$m1g3) # during preg, drugs freq
with(FF_labeled, table(m1g2, m1g3))## m1g3
## m1g2 1 E day 2 Svl/wk 3 Svl/mn 4 <1X/mn 5 Never Missing
## 1 E day 1 2 0 1 4 0
## 2 Svl/wk 1 7 2 2 15 0
## 3 Svl/mn 2 5 16 12 48 0
## 4 <1X/mn 6 1 14 58 324 0
## 5 Never 17 17 27 78 4219 6
## Missing 0 0 2 0 6 5
FF_labeled<-FF_labeled %>% mutate_at(c("m1g2", "m1g3"), .funs=funs(YesNoPreg=ifelse(.=="5 Never", "No", ifelse(. =="Missing", "Missing", "Yes"))))
with(FF_labeled, table(m1g2, m1g2_YesNoPreg))## m1g2_YesNoPreg
## m1g2 Missing No Yes
## 1 E day 0 0 8
## 2 Svl/wk 0 0 27
## 3 Svl/mn 0 0 83
## 4 <1X/mn 0 0 403
## 5 Never 0 4364 0
## Missing 13 0 0
## m1g3_YesNoPreg
## m1g3 Missing No Yes
## 1 E day 0 0 27
## 2 Svl/wk 0 0 32
## 3 Svl/mn 0 0 61
## 4 <1X/mn 0 0 151
## 5 Never 0 4616 0
## Missing 11 0 0
Maternal postnatal smoking at different survey points is fairly correlated, and alluvial plots of changes over time show broad consistency, so I decided to create a single smoking variable for maternal smoking at age 1 and age 5 - if mothers were smoking at either age 1 or age 5 then “Maternal smoking at age 1 or 5”, if not smoking at age 1 and age 5 then “No maternal smoking at age 1 and age 5” and then if missing data at one age and no at the other, then “Missing” (and if missing at both ages, “Missing”). Also created a similar dose variable capturing maternal packs per day that could be used instead. Decided to use just age 1 and age 5 because at these two time points the mother was asked (as opposed to PCG, also the dataset of Fragile Families metadata is missing PCG questions from age 3 about smoking) and they both preceed the actual saliva collection for both saliva collection age points. Then use a smoking dose variable from the mother or PCG interview at wave of saliva collection as an additional second hand smoke exposure variable.
#Baseline (1) -> Year 1/Age 1 (2) ->Year 3/Age3 (3) -> Year 5 (4) ->Year 9 (5) -> year 15 (6)
#Do you smoke in past month (how many packs per day)
#Mom ->m2j5 (m2j5a) -> -> m4j18 (m4j19) /p4 -> m5g17 (m5g18) / n5f17 (n5f18)
#PCG p3a23a(p3a23b) n5f17 (n5f18)-> p6h76 (p6h77)
#seem to be missing p3 variables (PCG survey year 3)
#number of people in household who smoke
#p3a23->p5h15b->p6h78
#hours child around smoke
#p3a21 -> p4a24->p5h15c
#with(FF_labeled, table(m2j5, m4j18))
#with(FF_labeled, table(m4j18, m5g17))
#with(FF_labeled, table(n5f17, m5g17))
#with(FF_labeled, table(p6h76, m5g17))
plot_grid(FF_labeled %>% select(m2j5, m4j18, m5g17, n5f17, p6h74) %>% mutate_all(funs(as.numeric(.))) %>%ggcorr(label=T)+ggtitle("Correlation between Yes/No smoking (mother) \n at years 1, 5, & 9 & PCG at years 9 & 15")
,FF_labeled %>% select(m2j5a, m4j19, m5g18, n5f18, p6h77) %>% mutate_all(funs(as.numeric(.))) %>%ggcorr(label=T)+ggtitle("Correlation between packs per day smoking (mother) \n at years 1, 5, 9 & PCG at years 9 and 15"), nrow=2)yesnosmkalluv<-to_lodes_form(as.data.frame(FF_labeled %>% select(m2j5, m4j18, m5g17, p6h74) %>% mutate(m5g17=stringr::str_to_title(m5g17))%>% group_by(m2j5, m4j18, m5g17, p6h74) %>% count()), axes=1:4, id='Cohort')
#ggplot(yesnosmkalluv, aes(x=x, stratum = stratum, alluvium=Cohort, fill=stratum, label=stratum, y=n))+geom_flow(stat="alluvium", lode.guidance="backfront", color='darkgrey')+geom_stratum()+scale_x_discrete(labels=c("Age 1", "Age 5", "Age 9"))
smkdose<-to_lodes_form(as.data.frame(FF_labeled %>% select(m2j5a, m4j19, m5g18, p6h77)%>% mutate_at(vars(c("m2j5a", "m4j19", "m5g18")), funs(factor(substring(., 1, 1), labels=levels(FF_labeled$m2j5a)))) %>% group_by(m2j5a, m4j19, m5g18, p6h77) %>% count()), axes=1:4, id='Cohort')
#ggplot(smkdose, aes(x=x, stratum = stratum, alluvium=Cohort, fill=stratum, label=stratum, y=n))+geom_flow()+geom_stratum()+scale_x_discrete(labels=c("Age 1", "Age 5", "Age 9"))
plot_grid(ggplot(yesnosmkalluv, aes(x=x, stratum = stratum, alluvium=Cohort, fill=stratum, label=stratum, y=n))+geom_flow()+geom_stratum()+scale_x_discrete(labels=c("Age 1", "Age 5", "Age 9", "Age 15"))
+ggtitle("Alluvial plot of Yes/No smoking variables from age 1 to age 9 (mother) and age 15 (PCG)"), ggplot(smkdose %>% filter(!(stratum %in% c("-6 Skip", "Missing"))), aes(x=x, stratum = stratum, alluvium=Cohort, fill=stratum, label=stratum, y=n))+geom_flow()+geom_stratum()+scale_x_discrete(labels=c("Age 1", "Age 5", "Age 9", "Age 15"))+ggtitle("Alluvial plot of smoking per day among those smoking from age 1 to age 9 (mother) and 15 (PCG)"), nrow=2)#ggplot(FF_labeled, aes(x=p5h15b, y=p6h78))+geom_jitter()+stat_cor()+scale_x_continuous('# smoking in house age 9')+scale_y_continuous('# smoking in house age 15')+ggtitle("# individuals smoking in house correlation age 9 and age 15")
#Make any maternal postnatal smoking variable & dose categorization for maternal smoking at ages 1 & 5
FF_labeled<-FF_labeled %>% mutate(PostnatalMaternalSmokingAny = case_when(
m2j5 =="1 Yes" ~ "Maternal smoking at age 1 or age 5",
m4j18=="1 Yes" ~ "Maternal smoking at age 1 or age 5",
m2j5 == "Missing" & m4j18=="Missing" ~ "Missing",
m2j5 == "Missing" & m4j18=="2 No" ~ "Missing",
m2j5 == "2 No" & m4j18=="Missing" ~ "Missing",
m2j5 == "2 No" & m4j18=="2 No" ~ "No maternal smoking at age 1 and age 5")) %>%
mutate(PostnatalMaternalSmokingDose=case_when(
m2j5a %in% c("2 1pk/d", "3 1.5pk/d", "4 2pk/d", "5 >2pk/d") &
m4j19 %in% c("2 About pack/day", "3 Pack and half/day", "4 2 packs/day", "5
More 2 packs/day") ~ "Mom - pack/d or greater at both age 1 & 5",
m2j5a %in% c("2 1pk/d", "3 1.5pk/d", "4 2pk/d", "5 >2pk/d") &
!(m4j19 %in% c("2 About pack/day", "3 Pack and half/day", "4 2 packs/day", "5
More 2 packs/day")) ~ "Mom - pack/d or greater at either age 1 or 5",
!(m2j5a %in% c("2 1pk/d", "3 1.5pk/d", "4 2pk/d", "5 >2pk/d")) &
m4j19 %in% c("2 About pack/day", "3 Pack and half/day", "4 2 packs/day", "5
More 2 packs/day") ~ "Mom - pack/d or greater at either age 1 or 5",
m2j5a %in% c("1 <1/2 pk/d") | m4j19 %in% c("1 Less half pack/day")~"Mom - less than half pack/d at either age 1 or 5",
m2j5a %in% c("-6 Skip") & m4j19 %in% c("-6 Skip")~
"No maternal smoking at age 1 or 5",
m2j5a %in% c("Missing") & m4j19 %in% c("-6 Skip", "Missing")~"Missing",
m2j5a %in% c("Missing", "-6 Skip") & m4j19 %in% c("Missing")~"Missing"))
with(FF_labeled, table(PostnatalMaternalSmokingAny, m2j5, m4j18))## , , m4j18 = 1 Yes
##
## m2j5
## PostnatalMaternalSmokingAny 1 Yes 2 No Missing
## Maternal smoking at age 1 or age 5 868 305 81
## Missing 0 0 0
## No maternal smoking at age 1 and age 5 0 0 0
##
## , , m4j18 = 2 No
##
## m2j5
## PostnatalMaternalSmokingAny 1 Yes 2 No Missing
## Maternal smoking at age 1 or age 5 168 0 0
## Missing 0 0 183
## No maternal smoking at age 1 and age 5 0 2526 0
##
## , , m4j18 = Missing
##
## m2j5
## PostnatalMaternalSmokingAny 1 Yes 2 No Missing
## Maternal smoking at age 1 or age 5 123 0 0
## Missing 0 371 273
## No maternal smoking at age 1 and age 5 0 0 0
#with(FF_labeled, table(m2j5a, m4j19, PostnatalMaternalSmokingDose))
#with(FF_labeled, table(PostnatalMaternalSmokingAny, PostnatalMaternalSmokingDose))Maybe here best to have 1 variable capturing any maternal postnatal smoking at age 1 & 5 and then use in each crossectional model the dose of maternal postnatal smoking at current visit (and similarly for linear mixed effect model)?
varLabels2<-c(paste(colnames(FF_labeled)[1:66], varLabels), "Has any methylation data?", "Has two visits of methylation data", "Any maternal smoking during pregnancy", "Ancestry from PCA of child's genetic data", "PC 1 (global)", "PC 2 (global", "PC 1 (within ancestry category)", "PC 2 (within ancestry category",
"Any alcohol during pregnancy", "Any drugs during pregnancy", "Any postnatal smoking (age 1 & 5)", "Postnatal smoking dose at age 1 & 5")
FF_labeled<-set_label(FF_labeled, varLabels2)
myFF<-FF_labeled %>% filter(MethylData=="Analysis subset")%>%mutate(idnum=id)
myFF<-left_join(myFF, methyldata)## Joining, by = "idnum"
varLabels3<-c(varLabels2, "ID", "Methylation ID", "Slide", "Array (RC)", "Visit", "Global methylation", "horvath13 methylation clock", "horvath18 methylation clock", "pediatric methylation clock", "levine methylation clock", "Epithelial cell proportion", "Fibroid cell proportion", "Immune cell proportion", "cg05575291 methylation", "cg04180046 methylation", "cg05549655 methylation", "cg14179389 methylation", "PES: Any smoking, newborn cordblood", "PES: Sustained smoking, newborn cordblood", "PES: Sustained smoking, newborn cordblood (+CT control)", "PES: Sustained smoking, older children peripheral blood", "PES: Any smoking, newborn cordblood - zscore", "PES: Sustained smoking, newborn cordblood - zscore", "PES: Sustained smoking, newborn cordblood (+CT control) - zscore", "PES: Sustained smoking, older children peripheral blood - zscore", "PES: Any smoking, newborn cordblood - mean center", "PES: Sustained smoking, newborn cordblood - mean center", "PES: Sustained smoking, newborn cordblood (+CT control) - mean center", "PES: Sustained smoking, older children peripheral blood - mean center")
myFF<-set_label(myFF, varLabels3)Exclude children who reported smoking at age 9 and age 15. Interestingly, the children who report smoking at age 9 do not report smoking at age 15, and the parental perception of child smoking at age 9 does not match the children who say they have ever smoked a cigarrette. For now using the ‘ever smoked’ variable from age 15 for the exclusion, but there is also a ‘smoked in past month’ that we could use instead (lose fewer children).
## childteen
## k5f1l C T
## 1 yes 6 6
## 2 no 816 835
## Missing 7 15
with(myFF, table(p5q3cr, k5f1l)) # not the same kids whose parents say somewhat or very true that the child is using tobacco## k5f1l
## p5q3cr 1 yes 2 no Missing
## 1 not true 12 1608 14
## 2 somewhat or sometimes true 0 4 0
## 3 very true or often true 0 2 0
## Missing 0 37 8
## childteen
## k6d40 C T
## 1 Yes 41 40
## 2 No 779 809
## Missing 9 7
#with(myFF, summary(k6d41))
with(myFF, table(k6d42, childteen)) # of these only 13 say they've smoked in the past month ## childteen
## k6d42 C T
## -6 Skip 779 809
## 1 Never 28 28
## 2 Once or twice a week 10 10
## 3 3 to 5 days a week 0 0
## 4 6 to 7 days a week 3 2
## Missing 9 7
#with(myFF, summary(k6d43))
with(myFF, table(k5f1l, k6d40, childteen))# not the same kids at age 9 and 15## , , childteen = C
##
## k6d40
## k5f1l 1 Yes 2 No Missing
## 1 yes 1 5 0
## 2 no 40 769 7
## Missing 0 5 2
##
## , , childteen = T
##
## k6d40
## k5f1l 1 Yes 2 No Missing
## 1 yes 1 5 0
## 2 no 39 791 5
## Missing 0 13 2
myFF<-myFF %>% mutate(SmkAtVisitPastmonth=case_when(childteen=='C' & m5g17=='1 yes' & m5g18 %in% c("2 about a pack", "3 a pack and a half", "4 about 2 packs",
"5 more than two packs") ~ "Pack or more a day",
childteen=='C' & m5g17=='1 yes' & m5g18=="1 less than half a pack a day" ~ "Less than pack a day",
childteen=='C' & n5f17=="1 yes" & n5f18 =='2 about a pack' ~ "Pack or more a day",
childteen=='C' & n5f17=="1 yes" & n5f18 =="1 less than half a pack day" ~ "Less than pack a day",
childteen=='C' & m5g17=="1 yes" & m5g18 %in% c("-6 Skip", "Missing")~"Missing",
childteen == 'C' & n5f17%in%c("-7 N/A", "2 no") & m5g17=="2 no" ~ "No smoking",
childteen == 'C' & n5f17%in%c("2 no") & m5g17%in%c("2 no", "Missing") ~ "No smoking",
childteen == 'C' & n5f17 %in% c("Missing", "-7 N/A") & m5g17 =="Missing" ~ "Missing",
childteen=='T' & p6h76 %in% c("-6 Skip", "0 None") ~ "No smoking",
childteen=='T' & p6h77%in% c("1 5 or fewer cigarettes per day", "2 About half a pack a day, 10 cigarettes")~"Less than pack a day",
childteen=='T' & p6h77 %in% c("3 About a pack a day, 20 cigarettes", "4 More than 1 pack a day")~"Pack or more a day",
childteen=='T' & p6h77 == "Missing"~"Missing"))%>%
mutate(SmkAtVisitYesNo=case_when(SmkAtVisitPastmonth=="No smoking"~"No",
SmkAtVisitPastmonth %in% c("Less than pack a day", "Pack or more a day") ~ "Yes",
childteen=='T'& SmkAtVisitPastmonth=="Missing" & !(p6h76 %in% c("-6 Skip", "0 None", "Missing"))~"Yes",
childteen=='C' & SmkAtVisitPastmonth=="Missing" & m5g17=='1 yes'~"Yes",
childteen=='C' & SmkAtVisitPastmonth=="Missing" & m5g17!='1 yes'~"Missing",
childteen=='T'& SmkAtVisitPastmonth=="Missing" & p6h76 %in% c("-6 Skip", "0 None", "Missing")~"Missing"))
myFF %>% filter(childteen=='C') %>% xtabs(~SmkAtVisitPastmonth+SmkAtVisitYesNo, data=.)## SmkAtVisitYesNo
## SmkAtVisitPastmonth Missing No Yes
## Less than pack a day 0 0 193
## Missing 13 0 0
## No smoking 0 544 0
## Pack or more a day 0 0 78
## SmkAtVisitYesNo
## SmkAtVisitPastmonth Missing No Yes
## Less than pack a day 0 0 184
## Missing 3 0 2
## No smoking 0 628 0
## Pack or more a day 0 0 39
## [1] 3 112
## [1] 0 112
## [1] 1 112
## [1] 976 112
myFF <- myFF %>%
mutate(ChildAgeAtIn=case_when(childteen=='C'~ cm5b_age, childteen=='T'~cp6yagem))%>%
mutate(ChildAgeAtVisit=case_when(childteen=='C'~hv5_agem, childteen=='T'~hv6_yagem))%>%
mutate(ChildAgeComposite=case_when(childteen=='C' & !is.na(hv5_agem)~hv5_agem,
childteen=='C' & is.na(hv5_agem)~cm5b_age,
childteen=='T' & !is.na(hv6_yagem)~hv6_yagem,
childteen=='T' & is.na(hv6_yagem)~cp6yagem))
myFF %>% filter(childteen=='C')%>%group_by(ChildAgeAtVisit, ChildAgeComposite)%>%summarise(n=n())%>%filter(ChildAgeAtVisit!=ChildAgeComposite)## `summarise()` regrouping output by 'ChildAgeAtVisit' (override with `.groups` argument)
## # A tibble: 0 x 3
## # Groups: ChildAgeAtVisit [0]
## # … with 3 variables: ChildAgeAtVisit <dbl>, ChildAgeComposite <dbl>, n <int>
myFF %>% filter(childteen=='T')%>%group_by(ChildAgeAtVisit, ChildAgeComposite)%>%summarise(n=n())%>%filter(ChildAgeAtVisit!=ChildAgeComposite)## `summarise()` regrouping output by 'ChildAgeAtVisit' (override with `.groups` argument)
## # A tibble: 0 x 3
## # Groups: ChildAgeAtVisit [0]
## # … with 3 variables: ChildAgeAtVisit <dbl>, ChildAgeComposite <dbl>, n <int>
ggplot(myFF%>%filter(childteen=='C'), aes(cm5b_age, hv5_agem))+geom_point()+stat_cor()+ggtitle("Correlation between childs age at mom interview (9 yr) and childs age at home visit (9 yr)")## Warning: Removed 23 rows containing non-finite values (stat_cor).
## Warning: Removed 23 rows containing missing values (geom_point).
age5resid<-myFF %>% filter(childteen=='C')%>%mutate(age5_resid=cm5b_age-hv5_agem)%>%pull(age5_resid)
hist(age5resid)age6resid<-myFF %>% filter(childteen=='T')%>%mutate(age6_resid=cp6yagem-hv6_yagem)%>%pull(age6_resid)
ggplot(myFF%>%filter(childteen=='T'), aes(cp6yagem,hv6_yagem))+geom_point()+stat_cor()+ggtitle("Correlation between childs age at mom interview (15 yr) and childs age at home visit (15 yr)")## Warning: Removed 498 rows containing non-finite values (stat_cor).
## Warning: Removed 498 rows containing missing values (geom_point).
##
## 0 5
## 1088 1
##
## -21 -18 -15 -14 -13 -12 -11 -10 -9 -8 -7 -6 -5 -4 -3 -2
## 1 1 2 4 3 6 15 10 5 10 13 16 13 24 52 70
## -1 0 1 2 3 4 5 6 7 8 10 11 12 13 14 15
## 123 189 1424 1222 18 12 5 15 4 4 3 2 1 1 1 3
## 16 17 21
## 1 1 1
##
## FALSE TRUE
## 1682 3
varLabels4<-c(varLabels3, "Mother/PCG (9/15 yr) smoking in month prior to interview (pk/day)", "Mother/PCG (9/15 yr) smoking in month prior to interview (yes/no)", "Child's age at maternal/PCG (9/15 yr) interview (months)", "Child's age at home vist (9/15 years) (months)", "Constructed child's age at home visit (in months) (9/15 years)")
myFF<-set_label(myFF, varLabels4)I also found two variables for child’s age at home visit, but the variable for child’s age at home visit for 15 years has a large amount of missingness in it (missing n=498). The variable for child’s age at home visit for 9 year visit is better (missing n=9). The interview age variables correlate well with the home visit age variables. For the 9 year interview/home visit ages, there are 5 individuals with >12 month difference between the ages, for the 15 year visit, there are 0 (and actually, there is 0 difference for any of the individuals with nonmissing home visit ages). So one option would be to use the home visit ages for the 9 year visit and for individuals with home visit age data available for the 15 year visit, supplementing the 15 year visit age variable with the 15 year PCG interview age visit for those missing the home visit age. Alternatively we could use baby birthdate and saliva colleciton date to create an age variable. I have the baby DOB variable, but I don’t have/don’t know where a saliva sample collection date would be.
slide_count<-myFF %>% mutate(slide=as.factor(slide))%>% group_by(smkPreg_binary, slide, .drop=F)%>%count()%>%pivot_wider(id_cols = slide, names_from=smkPreg_binary, values_from=n)%>%arrange(Yes)%>%mutate(Proportion=Yes/(No+Yes))
hist_data<-slide_count %>% ggplot(aes(x=Proportion))+geom_histogram(binwidth = 0.1, fill='blue', alpha=0.5, breaks=seq(0, 1, 0.1))+ggtitle('Histogram proprotion smk (from data)')
hist_data_c<-slide_count %>% ggplot(aes(x=Yes))+geom_histogram( fill='blue', alpha=0.5, breaks=c(0:12))+scale_x_continuous(breaks=c(0:12))+ggtitle('Histogram # Yes smk (from data)')
kable(slide_count)%>%kable_styling()%>%scroll_box(width='100%', height='400px')| slide | No | Yes | Proportion |
|---|---|---|---|
| 200347970068 | 12 | 0 | 0.0000000 |
| 200394970127 | 12 | 0 | 0.0000000 |
| 200397540068 | 12 | 0 | 0.0000000 |
| 200490750005 | 9 | 0 | 0.0000000 |
| 200490750009 | 11 | 0 | 0.0000000 |
| 200490750011 | 11 | 0 | 0.0000000 |
| 200490750063 | 8 | 0 | 0.0000000 |
| 200490750108 | 12 | 0 | 0.0000000 |
| 200490750124 | 12 | 0 | 0.0000000 |
| 200556930100 | 11 | 0 | 0.0000000 |
| 200556930111 | 10 | 0 | 0.0000000 |
| 200770460118 | 9 | 0 | 0.0000000 |
| 200770460172 | 12 | 0 | 0.0000000 |
| 200770460173 | 10 | 0 | 0.0000000 |
| 200863810009 | 12 | 0 | 0.0000000 |
| 200863810070 | 12 | 0 | 0.0000000 |
| 200863810114 | 10 | 0 | 0.0000000 |
| 200866140081 | 9 | 0 | 0.0000000 |
| 200866140163 | 12 | 0 | 0.0000000 |
| 201502620070 | 11 | 0 | 0.0000000 |
| 201502620071 | 11 | 0 | 0.0000000 |
| 201561870093 | 4 | 0 | 0.0000000 |
| 201561870123 | 12 | 0 | 0.0000000 |
| 3999984076 | 12 | 0 | 0.0000000 |
| 3999984094 | 12 | 0 | 0.0000000 |
| 3999984111 | 12 | 0 | 0.0000000 |
| 3999997019 | 12 | 0 | 0.0000000 |
| 9979858077 | 12 | 0 | 0.0000000 |
| 9979858078 | 12 | 0 | 0.0000000 |
| 9979858102 | 10 | 0 | 0.0000000 |
| 9979858118 | 12 | 0 | 0.0000000 |
| 9979858128 | 12 | 0 | 0.0000000 |
| 9986360014 | 12 | 0 | 0.0000000 |
| 9986360015 | 12 | 0 | 0.0000000 |
| 200397860021 | 10 | 1 | 0.0909091 |
| 200397860035 | 10 | 1 | 0.0909091 |
| 200490750003 | 10 | 1 | 0.0909091 |
| 200863810112 | 7 | 1 | 0.1250000 |
| 201502620017 | 10 | 1 | 0.0909091 |
| 201561870100 | 7 | 1 | 0.1250000 |
| 3999932043 | 9 | 1 | 0.1000000 |
| 9829118090 | 9 | 1 | 0.1000000 |
| 9829118105 | 9 | 1 | 0.1000000 |
| 9986360019 | 11 | 1 | 0.0833333 |
| 200347970051 | 10 | 2 | 0.1666667 |
| 200347970066 | 10 | 2 | 0.1666667 |
| 200347970071 | 10 | 2 | 0.1666667 |
| 200394970128 | 10 | 2 | 0.1666667 |
| 200397540074 | 10 | 2 | 0.1666667 |
| 200397860024 | 10 | 2 | 0.1666667 |
| 200397860046 | 7 | 2 | 0.2222222 |
| 200397860066 | 9 | 2 | 0.1818182 |
| 200397870011 | 8 | 2 | 0.2000000 |
| 200400320071 | 10 | 2 | 0.1666667 |
| 200490750001 | 10 | 2 | 0.1666667 |
| 200490750006 | 7 | 2 | 0.2222222 |
| 200490750021 | 10 | 2 | 0.1666667 |
| 200490750053 | 9 | 2 | 0.1818182 |
| 200490750074 | 8 | 2 | 0.2000000 |
| 200490750079 | 5 | 2 | 0.2857143 |
| 200490750081 | 10 | 2 | 0.1666667 |
| 200490750106 | 7 | 2 | 0.2222222 |
| 200490750107 | 10 | 2 | 0.1666667 |
| 200490750148 | 10 | 2 | 0.1666667 |
| 200556930110 | 10 | 2 | 0.1666667 |
| 200556930121 | 9 | 2 | 0.1818182 |
| 200556930130 | 9 | 2 | 0.1818182 |
| 200770460063 | 7 | 2 | 0.2222222 |
| 200863810019 | 10 | 2 | 0.1666667 |
| 200863810029 | 10 | 2 | 0.1666667 |
| 200863810053 | 9 | 2 | 0.1818182 |
| 200866140164 | 10 | 2 | 0.1666667 |
| 200866140165 | 10 | 2 | 0.1666667 |
| 200866140214 | 10 | 2 | 0.1666667 |
| 201502620002 | 9 | 2 | 0.1818182 |
| 201502620003 | 9 | 2 | 0.1818182 |
| 201502620010 | 7 | 2 | 0.2222222 |
| 201502620024 | 8 | 2 | 0.2000000 |
| 201502620039 | 8 | 2 | 0.2000000 |
| 201502620073 | 10 | 2 | 0.1666667 |
| 201502620074 | 9 | 2 | 0.1818182 |
| 201502620078 | 10 | 2 | 0.1666667 |
| 201561870001 | 10 | 2 | 0.1666667 |
| 201561870002 | 10 | 2 | 0.1666667 |
| 201561870060 | 8 | 2 | 0.2000000 |
| 201561870068 | 8 | 2 | 0.2000000 |
| 201561870083 | 10 | 2 | 0.1666667 |
| 201561870090 | 10 | 2 | 0.1666667 |
| 201561870091 | 10 | 2 | 0.1666667 |
| 201561870092 | 10 | 2 | 0.1666667 |
| 201561870101 | 8 | 2 | 0.2000000 |
| 201561870126 | 6 | 2 | 0.2500000 |
| 201561870136 | 9 | 2 | 0.1818182 |
| 3999932048 | 9 | 2 | 0.1818182 |
| 3999932050 | 10 | 2 | 0.1666667 |
| 3999932051 | 10 | 2 | 0.1666667 |
| 3999984051 | 10 | 2 | 0.1666667 |
| 3999984077 | 10 | 2 | 0.1666667 |
| 3999984080 | 7 | 2 | 0.2222222 |
| 3999984088 | 9 | 2 | 0.1818182 |
| 3999984090 | 10 | 2 | 0.1666667 |
| 3999984108 | 10 | 2 | 0.1666667 |
| 9829118115 | 5 | 2 | 0.2857143 |
| 9829118137 | 9 | 2 | 0.1818182 |
| 9979858076 | 10 | 2 | 0.1666667 |
| 9979858105 | 10 | 2 | 0.1666667 |
| 9986360012 | 10 | 2 | 0.1666667 |
| 9986360033 | 9 | 2 | 0.1818182 |
| 200556930073 | 9 | 3 | 0.2500000 |
| 200556930095 | 9 | 3 | 0.2500000 |
| 200770460062 | 8 | 3 | 0.2727273 |
| 200863730056 | 9 | 3 | 0.2500000 |
| 201502620016 | 7 | 3 | 0.3000000 |
| 3999932006 | 5 | 3 | 0.3750000 |
| 3999932091 | 9 | 3 | 0.2500000 |
| 3999932092 | 6 | 3 | 0.3333333 |
| 200347970040 | 8 | 4 | 0.3333333 |
| 200347970050 | 6 | 4 | 0.4000000 |
| 200347970069 | 8 | 4 | 0.3333333 |
| 200397540092 | 8 | 4 | 0.3333333 |
| 200397860096 | 6 | 4 | 0.4000000 |
| 200490750077 | 8 | 4 | 0.3333333 |
| 200490750082 | 8 | 4 | 0.3333333 |
| 200490750090 | 8 | 4 | 0.3333333 |
| 200490750091 | 3 | 4 | 0.5714286 |
| 200490750093 | 8 | 4 | 0.3333333 |
| 200556930109 | 6 | 4 | 0.4000000 |
| 200863730043 | 8 | 4 | 0.3333333 |
| 200863730057 | 8 | 4 | 0.3333333 |
| 200866140028 | 6 | 4 | 0.4000000 |
| 200866140079 | 8 | 4 | 0.3333333 |
| 200866140238 | 8 | 4 | 0.3333333 |
| 3999984091 | 8 | 4 | 0.3333333 |
| 3999984112 | 8 | 4 | 0.3333333 |
| 3999997140 | 8 | 4 | 0.3333333 |
| 9979858131 | 8 | 4 | 0.3333333 |
| 9986360027 | 8 | 4 | 0.3333333 |
| 200490750095 | 7 | 5 | 0.4166667 |
| 201561870059 | 7 | 5 | 0.4166667 |
| 3999932036 | 6 | 5 | 0.4545455 |
| 200347970036 | 6 | 6 | 0.5000000 |
| 200397540069 | 6 | 6 | 0.5000000 |
| 200397860094 | 6 | 6 | 0.5000000 |
| 200490750080 | 2 | 6 | 0.7500000 |
| 200866140159 | 6 | 6 | 0.5000000 |
| 3999984067 | 6 | 6 | 0.5000000 |
| 9986360025 | 6 | 6 | 0.5000000 |
| 9986360046 | 6 | 6 | 0.5000000 |
| 200397540040 | 4 | 8 | 0.6666667 |
| 201561870003 | 4 | 8 | 0.6666667 |
| 3999984100 | 4 | 8 | 0.6666667 |
| 3999984114 | 4 | 8 | 0.6666667 |
#simulation
set.seed(seed=20210216)
n<-nrow(myFF)
yes<-table(myFF$smkPreg_binary)[2]; no<-table(myFF$smkPreg_binary)[1]
values<-data.frame('smk'=sample(c('No', 'Yes'), n, replace=T, prob=c(no/(no+yes), yes/(no+yes))))
values_slides<-values %>%
group_by((row_number()-1) %/% (n()/152)) %>%
nest()%>%unnest(cols=c(data))
colnames(values_slides)<-c('slide', 'smk')
slide_sim<-values_slides%>%mutate(slide=as.factor(slide)) %>% group_by(smk, slide, .drop=F)%>%count()%>%pivot_wider(id_cols = slide, names_from=smk, values_from=n)%>%arrange(Yes)%>%mutate(Proportion=Yes/(No+Yes))
hist_sim<-slide_sim%>% ggplot(aes(x=Proportion))+geom_histogram(binwidth = 0.1, fill='red', alpha=0.5, breaks=seq(0, 1, 0.1))+ggtitle('Histogram proprotion smk (from simulation)')
hist_sim_c<-slide_sim%>% ggplot(aes(x=Yes))+geom_histogram(fill='red', alpha=0.5, breaks=c(0:12))+scale_x_continuous(breaks=c(0:12))+ggtitle('Histogram # Yes smk (from simulation)')
hist_overlay<-slide_count %>% ggplot(aes(x=Proportion))+geom_histogram(fill='blue', binwidth = 0.1, alpha=0.5, breaks=seq(0, 1, 0.1))+geom_histogram(aes(x=slide_sim$Proportion), breaks=seq(0, 1, 0.1), fill='red', binwidth = 0.1, alpha=0.5)+xlim(0,1)
hist_overlay_c<-slide_count %>% ggplot(aes(x=Yes))+geom_histogram(fill='blue', alpha=0.5, breaks=c(0:12))+geom_histogram(aes(x=slide_sim$Yes), fill='red', alpha=0.5, breaks=c(0:12))+scale_x_continuous(breaks=c(0:12))
plot_grid(plot_grid(hist_data, hist_sim, nrow=2), hist_overlay, nrow=1)FF_labeled<-set_label(FF_labeled, varLabels)
methyl<-createTable(compareGroups(MethylData~smkPreg_binary+cm1inpov+cm1ethrace+cm1bsex+m1g2_YesNoPreg+m1g3_YesNoPreg+PostnatalMaternalSmokingAny+k5f1l+k6d40+f1g4, data=FF_labeled), show.all=T)
export2md(methyl, caption="Exclusion table of all Fragile Families vs those with any methylation data for main predictors and key demographic variables from first wave")| [ALL] | Analysis subset | Not in analysis subset | p.overall | |
|---|---|---|---|---|
| N=4898 | N=880 | N=4018 | ||
| Any maternal smoking during pregnancy: | 0.230 | |||
| Missing | 12 (0.24%) | 0 (0.00%) | 12 (0.30%) | |
| No | 3934 (80.3%) | 702 (79.8%) | 3232 (80.4%) | |
| Yes | 952 (19.4%) | 178 (20.2%) | 774 (19.3%) | |
| cm1inpov Constructed - Poverty ratio - mother’s household income/poverty threshold | 2.22 (2.41) | 2.23 (2.43) | 2.22 (2.41) | 0.981 |
| cm1ethrace Mother race (baseline own report): | . | |||
| 1 white, non-hispanic | 1030 (21.0%) | 174 (19.8%) | 856 (21.3%) | |
| 2 black, non-hispanic | 2326 (47.5%) | 485 (55.1%) | 1841 (45.8%) | |
| 3 hispanic | 1336 (27.3%) | 188 (21.4%) | 1148 (28.6%) | |
| 4 other | 194 (3.96%) | 31 (3.52%) | 163 (4.06%) | |
| Missing | 12 (0.24%) | 2 (0.23%) | 10 (0.25%) | |
| cm1bsex Constructed - Focal baby’s gender: | 0.348 | |||
| 1 Boy | 2568 (52.4%) | 444 (50.5%) | 2124 (52.9%) | |
| 2 Girl | 2329 (47.6%) | 436 (49.5%) | 1893 (47.1%) | |
| Missing | 1 (0.02%) | 0 (0.00%) | 1 (0.02%) | |
| Any alcohol during pregnancy: | 0.635 | |||
| Missing | 13 (0.27%) | 2 (0.23%) | 11 (0.27%) | |
| No | 4364 (89.1%) | 777 (88.3%) | 3587 (89.3%) | |
| Yes | 521 (10.6%) | 101 (11.5%) | 420 (10.5%) | |
| Any drugs during pregnancy: | 0.478 | |||
| Missing | 11 (0.22%) | 3 (0.34%) | 8 (0.20%) | |
| No | 4616 (94.2%) | 833 (94.7%) | 3783 (94.2%) | |
| Yes | 271 (5.53%) | 44 (5.00%) | 227 (5.65%) | |
| Any postnatal smoking (age 1 & 5): | <0.001 | |||
| Maternal smoking at age 1 or age 5 | 1545 (31.5%) | 333 (37.8%) | 1212 (30.2%) | |
| Missing | 827 (16.9%) | 47 (5.34%) | 780 (19.4%) | |
| No maternal smoking at age 1 and age 5 | 2526 (51.6%) | 500 (56.8%) | 2026 (50.4%) | |
| k5f1l F1L. Smoked a cigarette or used tobacco: | <0.001 | |||
| 1 yes | 26 (0.53%) | 6 (0.68%) | 20 (0.50%) | |
| 2 no | 3313 (67.6%) | 859 (97.6%) | 2454 (61.1%) | |
| Missing | 1559 (31.8%) | 15 (1.70%) | 1544 (38.4%) | |
| k6d40 ID: | <0.001 | |||
| 1 Yes | 185 (3.78%) | 41 (4.66%) | 144 (3.58%) | |
| 2 No | 3244 (66.2%) | 829 (94.2%) | 2415 (60.1%) | |
| Missing | 1469 (30.0%) | 10 (1.14%) | 1459 (36.3%) | |
| f1g4 In the past 3 months, how many cigarettes did you smoke a day?: | 0.029 | |||
| 1 2+pk/d | 62 (1.27%) | 11 (1.25%) | 51 (1.27%) | |
| 2 1<pk<2 | 364 (7.43%) | 73 (8.30%) | 291 (7.24%) | |
| 3 <1pk/d | 1067 (21.8%) | 204 (23.2%) | 863 (21.5%) | |
| 4 None | 2327 (47.5%) | 434 (49.3%) | 1893 (47.1%) | |
| Missing | 1078 (22.0%) | 158 (18.0%) | 920 (22.9%) |
methylCohort<-pdqc_all %>% filter(childteen!='M')
basemodelvar<-c("smkPreg_binary", "ancestry", "cm1bsex", "cm1inpov", "Epi", "IC", "ChildAgeComposite")
basemodeldata<-myFF %>% filter_at(all_of(basemodelvar), all_vars(!(. %in%c("Missing", "Missing PC data"))& !is.na(.)))
child_base<-basemodeldata%>%filter(childteen=='C')
teen_base<-basemodeldata%>%filter(childteen=='T')
prenatalexposurevar<-c(basemodelvar, "m1g2_YesNoPreg", "m1g3_YesNoPreg")
prenatalexdata<-basemodeldata %>%filter_at(all_of(prenatalexposurevar), all_vars(!(. %in% c("Missing"))))
child_prenatal<-prenatalexdata%>%filter(childteen=='C')
teen_prenatal<-prenatalexdata%>%filter(childteen=='T')
secondhandsmokevars<-c(prenatalexposurevar, "PostnatalMaternalSmokingAny", "SmkAtVisitPastmonth")
secondhandsmkdata<-prenatalexdata%>%filter_at(all_of(secondhandsmokevars), all_vars(!(. %in% c("Missing"))))
child_secondhand<-secondhandsmkdata %>% filter(childteen=='C')
teen_secondhand<-secondhandsmkdata%>%filter(childteen=='T')
twovisit_secondhand<-secondhandsmkdata %>% filter(idnum %in% child_secondhand$idnum & idnum %in% teen_secondhand$idnum)
child_smoke<-secondhandsmkdata %>% filter(k5f1l!= "2 no" & childteen=='C')
child_nosmoke<-secondhandsmkdata %>% filter(k5f1l=="2 no" & childteen=='C')
teen_smoke<-secondhandsmkdata %>% filter(k6d40!="2 No" & childteen=='T')
teen_nosmoke<-secondhandsmkdata %>% filter(k6d40=="2 No" & childteen=='T')
twovisit_nosmoke<-secondhandsmkdata %>% filter(idnum %in% child_nosmoke$idnum & idnum %in% teen_nosmoke$idnum)
fathersmkdata<-secondhandsmkdata %>% filter(f1g4!="Missing")
#Make Labels
n_fullFF<-paste0("Fragile Families cohort \n", "n=", FF_labeled %>% nrow())
n_methylQC<-paste0("Cohort with 450K methylation data \n", "n=", methylCohort$idnum%>%unique()%>%length(), "individuals with m=", methylCohort$MethID%>%length(), "samples")
n_QCloss<-paste0("Methylation data QC: 12 outlier samples dropped, \n", pdqc_all%>%filter(probe_fail_10==T)%>%nrow()-12, " samples with >10% of probes failing dropped, \n", pdqc_all%>%filter(sex!=predicted_sex & probe_fail_10==F & childteen!='M')%>%nrow(), " discordant sex samples dropped, \n & ", nrow(pdqc)-nrow(pdqc_clean), " duplicate samples dropped")
n_methylFF<-paste0("Cohort with quality controlled 450K data \n","n=", myFF %>% select(id) %>% unique() %>% nrow(), " individuals with m=", myFF%>%nrow(), " samples")
n_basemodel<-paste0("Base models: \n", "n=", basemodeldata %>% select(id) %>% unique() %>% nrow(), " individuals with m=", basemodeldata %>%nrow(), " samples")
n_baseexclusions<-paste0("n=", myFF %>% filter_at(all_of(basemodelvar), any_vars((. %in%c("Missing", "Missing PC data"))))%>% select(id) %>% unique() %>% nrow(), " individuals & m=", myFF %>% filter_at(all_of(basemodelvar), any_vars((. %in%c("Missing", "Missing PC data")))) %>% nrow(), " samples missing principal components of ancestry; \n n=", myFF %>% filter_at(all_of(basemodelvar), any_vars((is.na(.))))%>%select(id)%>%unique()%>%nrow(), " individuals & m=", myFF %>% filter_at(all_of(basemodelvar), any_vars((is.na(.))))%>%nrow(), " samples missing child age at maternal/PCG interview")
n_prenatalexposures<-paste0("Prenatal exposures models: \n","n=", prenatalexdata %>% select(id) %>% unique() %>% nrow(), " individuals with m=", prenatalexdata %>%nrow(), " samples")
n_prenatalexclusions<-paste0("n=", basemodeldata %>% filter_at(all_of(prenatalexposurevar), any_vars(. %in% c("Missing"))) %>% select(id) %>% unique() %>% nrow(), " individuals & m=", basemodeldata %>% filter_at(all_of(prenatalexposurevar), any_vars(. %in% c("Missing"))) %>% nrow(), " samples missing \n data on maternal prenatal alcohol and drug use")
n_secondhandSmk<-paste0("Secondhand smoke exposure models: \n","n=", secondhandsmkdata %>% select(id) %>% unique() %>% nrow(), " individuals with m=", secondhandsmkdata %>%nrow(), " samples")
n_secondhandexclusions<-paste0("n=", prenatalexdata$id[!(prenatalexdata$id %in% secondhandsmkdata$id)] %>% unique()%>% length(), " individuals & m=", prenatalexdata %>% filter_at(all_of(secondhandsmokevars), any_vars(. %in% c("Missing"))) %>% nrow(), " samples missing data \n on maternal or primary care giver postnatal smoking")
n_Child<-paste0('n=', child_secondhand %>% nrow(), " individuals with a 9 year visit")
n_ChildSmkexcluded<-paste0("Sensitivity analysis exclude children who smoke: \n",'n=', child_nosmoke %>% nrow(), " individuals with a 9 year visit")
n_ChildSmkexclusions<-paste0('n=', child_smoke%>%nrow(), " individual \n smoking at 9 year visit \n or missing focal child smoking data")
n_Teen<-paste0('n=', teen_secondhand %>% nrow(), " individuals with a 15 year visit")
n_TeenSmkexcluded<-paste0("Sensitivity analysis exclude children who smoke: \n",'n=', teen_nosmoke %>% nrow(), " individuals with a 15 year visit")
n_TeenSmkexclusions<-paste0('n=', teen_smoke%>%nrow(), " individuals \n smoking at 15 year visit \n or missing focal child smoking data")
n_sensFatherSmoking<-paste0("Sensitivity analysis: compare effects of maternal and paternal prenatal smoking \n", "n=", fathersmkdata %>% select(id) %>% unique() %>% nrow(), " individuals with m=", fathersmkdata %>%nrow(), " samples", "\n with prenatal paternal smoking data")
n_sensFatherSmokingEx<- paste0("n=", secondhandsmkdata$id[!(secondhandsmkdata$id %in% fathersmkdata$id)] %>% unique()%>% length(), " individuals & m=", secondhandsmkdata %>% filter(f1g4 %in% c("Missing")) %>% nrow(), " samples missing data on paternal prenatal smoking")
n_twovisits<-paste0("n=", twovisit_secondhand%>%pull(idnum) %>%unique() %>%length(), " individuals with both a 9 and 15 year visit")
n_twovisitsSmkExcluded<-paste0("Sensitivity analysis exclude children who smoke: \n", "n=", twovisit_nosmoke%>%pull(idnum) %>%unique() %>%length(), " individuals with both a 9 and 15 year visit")
n_twovisitSmkExclusions<-paste0("n=", (twovisit_secondhand%>%pull(idnum) %>%unique() %>%length())-(twovisit_nosmoke%>%pull(idnum) %>%unique() %>%length()), " individuals smoking at \n either 9 and 15 year visit or \n missing focal child smoking data")graph1<-DiagrammeR::grViz("
digraph graph2 {
compound=true;
graph [layout = dot]
node [shape = rectangle, width=4]
a [label = '@@1']
b [label = '@@2']
c [label = '@@3']
d [label = '@@4']
e [label = '@@5']
f [label = '@@6']
g [label = '@@7']
h [label = '@@8']
i [label = '@@9', group=g1]
j [label = '@@10']
k [label = '@@16']
l [label = '@@17']
m [label = '@@19']
#fake nodes for connecting to edges
node [shape=point, width=0.01, height=0.01]
a1
b1
c1
d1
# e1
f1 [group=g1]
g1
k1
#exclusion nodes
node [shape = rectangle, width=4]
aa [label = '@@20']
bb [label = '@@11']
cc [label = '@@12']
dd [label = '@@13']
ff [label = '@@14', width=3]
gg [label = '@@15', width=3]
kk [label = '@@18', width=3]
#draw connections
a->m
m->a1 [arrowhead=none]
a1->b
b->b1 [arrowhead=none]
b1->c
c->c1 [arrowhead=none]
c1->d
d->d1 [arrowhead=none]
d1->e
e->f; e->g; e->k
{rank=same; h->e [dir=back, minlen=2, style=dashed]}
f->f1 [arrowhead=none, style=dashed]
g->g1 [arrowhead=none, style=dashed]
k->k1 [arrowhead=none, style=dashed]
f1->i [style=dashed]
g1->j [style=dashed]
k1->l [style=dashed]
{rank=same; a1->aa [minlen=2]}
{rank=same; b1->bb [minlen=2]}
{rank=same; c1->cc [minlen=2]}
{rank=same; d1->dd [minlen=2]}
{rank=same; f1->ff [minlen=2]}
{rank=same; g1->gg [minlen=2]}
{rank=same; k1->kk [minlen=2]}
#invisible constraints to force ordering
ff->i [style=invis]
}
[1]: n_fullFF
[2]: n_methylFF
[3]: n_basemodel
[4]: n_prenatalexposures
[5]: n_secondhandSmk
[6]: n_Child
[7]: n_Teen
[8]: n_sensFatherSmoking
[9]: n_ChildSmkexcluded
[10]: n_TeenSmkexcluded
[11]: n_baseexclusions
[12]: n_prenatalexclusions
[13]: n_secondhandexclusions
[14]: n_ChildSmkexclusions
[15]: n_TeenSmkexclusions
[16]: n_twovisits
[17]: n_twovisitsSmkExcluded
[18]: n_twovisitSmkExclusions
[19]: n_methylQC
[20]: n_QCloss
")Cell type proportions do differ between 9 and 15 year visits and although global methylation does not look different between these visits, methylation clocks do (a good check), although, interestingly, cell type proportion does correlate with methylation age.
methylation_data<-myFF %>% select(colnames(methyldata))%>%select(-MethID, -childteen, -idnum, -slide, -array)
chart.Correlation(methylation_data, histogram=T)chart.Correlation(methylation_data %>%select(SSolder_notransform, SSnewbornCT_notransform, SSnewborn_notransform, anynewborn_notransform))Thought this was a little weird, investigated the actual coefficients for each score:
coefs_methyl<-cbind('SSolderCoefs'=cpgs$SSolder$Coef, 'SSnewbornCTCoefs'=cpgs$SSnewbornCT$Coef, 'SSnewbornCoefs'=cpgs$SSnewborn$Coef, 'anynewbornCoefs'=cpgs$anynewborn$Coef)
chart.Correlation(coefs_methyl)PMS<-methyldata %>%select(idnum, childteen, colnames(polyscores_wide), -MethID)%>%pivot_longer(cols=any_of(colnames(polyscores_wide)), names_to="PMS_type", values_to="PolymethylationScore")%>%pivot_wider(names_from = childteen, values_from=PolymethylationScore)
ggplot(PMS, aes(x=C, y=T))+geom_point()+facet_grid(~PMS_type)+stat_cor()+geom_smooth(method=lm)gg_newborn<-ggplot(methyldata, aes(x=childteen, y=SSnewbornCT_center, group=idnum))+geom_point()+geom_line(alpha=0.2)+geom_smooth(aes(group=1), method='lm')+ggtitle("SS newborn (+CT)")
gg_older<-ggplot(methyldata, aes(x=childteen, y=SSolder_center, group=idnum))+geom_point()+geom_line(alpha=0.2)+geom_smooth(aes(group=1), method='lm')+ggtitle("SS older")
plot_grid(gg_newborn, gg_older, nrow = 2)## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
gg_newborn<-ggplot(myFF, aes(x=childteen, y=SSnewbornCT_center, group=idnum, color=smkPreg_binary))+geom_point()+geom_line(alpha=0.2)+geom_smooth(aes(group=1),color='black', method='lm')+facet_wrap(~smkPreg_binary)
gg_older<-ggplot(myFF, aes(x=childteen, y=SSolder_center, group=idnum, color=smkPreg_binary))+geom_point()+geom_line(alpha=0.2)+geom_smooth(aes(group=1),color='black', method='lm')+facet_wrap(~smkPreg_binary)
plot_grid(gg_newborn, gg_older, nrow=2)## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
PMS<-left_join(methyldata %>%select(idnum, childteen, colnames(polyscores_wide), -MethID)%>%pivot_longer(cols=any_of(colnames(polyscores_wide)), names_to="PMS_type", values_to="PolymethylationScore"), myFF[, c('idnum', 'smkPreg_binary')]%>%unique())## Joining, by = "idnum"
ggplot(PMS, aes(x=childteen, y=PolymethylationScore, color=smkPreg_binary))+geom_boxplot()+facet_wrap(~PMS_type)+stat_compare_means(label='p.signif', label.y=4)smk_all<-strataTable(createTable(compareGroups(smkPreg_binary~globalmethylation+SSnewbornCT_notransform+SSolder_notransform+cg05575921+pediatric+levine+Epi+Fib+IC+cm1inpov+cm1ethrace+cm1bsex+m1g2_YesNoPreg+m1g3_YesNoPreg+PostnatalMaternalSmokingAny+SmkAtVisitPastmonth+k5f1l+k6d40+f1g4, data=tabledata, simplify = F), show.all=F), 'childteen')
export2md(smk_all)| No | Yes | p.overall | No | Yes | p.overall | |
|---|---|---|---|---|---|---|
| N=574 | N=149 | N=583 | N=137 | |||
| Global methylation | 0.48 (0.01) | 0.48 (0.01) | 0.377 | 0.48 (0.01) | 0.48 (0.01) | 0.145 |
| PES: Sustained smoking, newborn cordblood (+CT control) | 2.58 (0.22) | 2.72 (0.23) | <0.001 | 2.62 (0.22) | 2.74 (0.23) | <0.001 |
| PES: Sustained smoking, older children peripheral blood | 2.35 (0.24) | 2.46 (0.25) | <0.001 | 2.35 (0.28) | 2.45 (0.28) | <0.001 |
| cg05575291 methylation | 0.78 (0.05) | 0.77 (0.06) | 0.022 | 0.77 (0.06) | 0.76 (0.06) | 0.323 |
| pediatric methylation clock | 8.98 (0.85) | 9.02 (1.23) | 0.752 | 11.6 (1.74) | 11.6 (1.79) | 0.865 |
| levine methylation clock | 6.44 (5.79) | 6.82 (6.33) | 0.500 | 16.4 (6.21) | 16.4 (6.13) | 0.981 |
| Epithelial cell proportion | 0.27 (0.13) | 0.26 (0.12) | 0.199 | 0.30 (0.16) | 0.30 (0.16) | 0.724 |
| Fibroid cell proportion | 0.02 (0.01) | 0.02 (0.01) | 0.946 | 0.01 (0.01) | 0.01 (0.01) | 0.704 |
| Immune cell proportion | 0.71 (0.12) | 0.73 (0.12) | 0.182 | 0.68 (0.15) | 0.69 (0.15) | 0.697 |
| cm1inpov Constructed - Poverty ratio - mother’s household income/poverty threshold | 2.41 (2.56) | 1.45 (1.48) | <0.001 | 2.41 (2.57) | 1.37 (1.44) | <0.001 |
| cm1ethrace Mother race (baseline own report): | <0.001 | 0.004 | ||||
| 1 white, non-hispanic | 98 (17.1%) | 46 (30.9%) | 101 (17.3%) | 41 (29.9%) | ||
| 2 black, non-hispanic | 324 (56.4%) | 82 (55.0%) | 333 (57.1%) | 76 (55.5%) | ||
| 3 hispanic | 131 (22.8%) | 17 (11.4%) | 127 (21.8%) | 16 (11.7%) | ||
| 4 other | 19 (3.31%) | 4 (2.68%) | 20 (3.43%) | 4 (2.92%) | ||
| Missing | 2 (0.35%) | 0 (0.00%) | 2 (0.34%) | 0 (0.00%) | ||
| cm1bsex Constructed - Focal baby’s gender: | 0.581 | 0.704 | ||||
| 1 Boy | 286 (49.8%) | 70 (47.0%) | 280 (48.0%) | 63 (46.0%) | ||
| 2 Girl | 288 (50.2%) | 79 (53.0%) | 303 (52.0%) | 74 (54.0%) | ||
| Missing | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | ||
| Any alcohol during pregnancy: | <0.001 | <0.001 | ||||
| No | 531 (92.5%) | 102 (68.5%) | 543 (93.1%) | 95 (69.3%) | ||
| Yes | 43 (7.49%) | 47 (31.5%) | 40 (6.86%) | 42 (30.7%) | ||
| Any drugs during pregnancy: | <0.001 | <0.001 | ||||
| No | 564 (98.3%) | 123 (82.6%) | 574 (98.5%) | 109 (79.6%) | ||
| Yes | 10 (1.74%) | 26 (17.4%) | 9 (1.54%) | 28 (20.4%) | ||
| Any postnatal smoking (age 1 & 5): | <0.001 | <0.001 | ||||
| Maternal smoking at age 1 or age 5 | 146 (25.4%) | 142 (95.3%) | 151 (25.9%) | 130 (94.9%) | ||
| No maternal smoking at age 1 and age 5 | 428 (74.6%) | 7 (4.70%) | 432 (74.1%) | 7 (5.11%) | ||
| Mother/PCG (9/15 yr) smoking in month prior to interview (pk/day): | <0.001 | <0.001 | ||||
| Less than pack a day | 91 (15.9%) | 82 (55.0%) | 82 (14.1%) | 75 (54.7%) | ||
| Missing | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | ||
| No smoking | 458 (79.8%) | 23 (15.4%) | 490 (84.0%) | 43 (31.4%) | ||
| Pack or more a day | 25 (4.36%) | 44 (29.5%) | 11 (1.89%) | 19 (13.9%) | ||
| k5f1l F1L. Smoked a cigarette or used tobacco: | . | 0.839 | ||||
| 1 yes | 0 (0.00%) | 0 (0.00%) | 3 (0.51%) | 0 (0.00%) | ||
| 2 no | 574 (100%) | 149 (100%) | 571 (97.9%) | 136 (99.3%) | ||
| Missing | 0 (0.00%) | 0 (0.00%) | 9 (1.54%) | 1 (0.73%) | ||
| k6d40 ID: | 0.003 | . | ||||
| 1 Yes | 17 (2.96%) | 14 (9.40%) | 0 (0.00%) | 0 (0.00%) | ||
| 2 No | 552 (96.2%) | 133 (89.3%) | 583 (100%) | 137 (100%) | ||
| Missing | 5 (0.87%) | 2 (1.34%) | 0 (0.00%) | 0 (0.00%) | ||
| f1g4 In the past 3 months, how many cigarettes did you smoke a day?: | <0.001 | <0.001 | ||||
| 1 2+pk/d | 8 (1.39%) | 3 (2.01%) | 7 (1.20%) | 3 (2.19%) | ||
| 2 1<pk<2 | 42 (7.32%) | 20 (13.4%) | 41 (7.03%) | 20 (14.6%) | ||
| 3 <1pk/d | 117 (20.4%) | 57 (38.3%) | 123 (21.1%) | 49 (35.8%) | ||
| 4 None | 316 (55.1%) | 43 (28.9%) | 317 (54.4%) | 41 (29.9%) | ||
| Missing | 91 (15.9%) | 26 (17.4%) | 95 (16.3%) | 24 (17.5%) |
Note hypomethylation in exposed children at cg05575291 (AHHR) gene, which is more strongly associated at the Teen visit than the Child visit…. although not a very small p-value - check with Kelly - is this concerning?
Additionally, cell type proportion does not appear to associate with main predictor of binary prenatal smoking variable.
These tables use the base model group from the above CONSORT diagram (n=844).
smkA<-strataTable(createTable(compareGroups(smkPreg_binary~globalmethylation+SSnewbornCT_notransform+SSolder_notransform+cg05575921+pediatric+levine+Epi+Fib+IC+cm1inpov+cm1ethrace+cm1bsex+m1g2_YesNoPreg+m1g3_YesNoPreg+PostnatalMaternalSmokingAny+SmkAtVisitPastmonth+k5f1l+k6d40+f1g4, data=tabledata%>%filter(ancestry=='African ancestry')%>%copy_labels(tabledata)), show.all=F), 'childteen')
export2md(smkA, caption="Smoked during pregnancy vs covariates: African ancestry only")| No | Yes | p.overall | No | Yes | p.overall | |
|---|---|---|---|---|---|---|
| N=356 | N=96 | N=363 | N=86 | |||
| Global methylation | 0.48 (0.01) | 0.48 (0.01) | 0.612 | 0.48 (0.01) | 0.48 (0.01) | 0.471 |
| PES: Sustained smoking, newborn cordblood (+CT control) | 2.59 (0.23) | 2.73 (0.22) | <0.001 | 2.63 (0.22) | 2.76 (0.21) | <0.001 |
| PES: Sustained smoking, older children peripheral blood | 2.34 (0.25) | 2.44 (0.26) | 0.001 | 2.35 (0.29) | 2.41 (0.31) | 0.068 |
| cg05575291 methylation | 0.78 (0.05) | 0.76 (0.06) | 0.010 | 0.77 (0.06) | 0.75 (0.07) | 0.082 |
| pediatric methylation clock | 9.06 (0.80) | 9.16 (1.41) | 0.509 | 11.7 (1.74) | 12.0 (1.89) | 0.303 |
| levine methylation clock | 6.53 (6.07) | 7.30 (6.83) | 0.318 | 16.4 (6.16) | 16.7 (6.50) | 0.707 |
| Epithelial cell proportion | 0.29 (0.13) | 0.28 (0.13) | 0.486 | 0.31 (0.16) | 0.32 (0.17) | 0.511 |
| Fibroid cell proportion | 0.02 (0.01) | 0.02 (0.01) | 0.780 | 0.01 (0.01) | 0.01 (0.01) | 0.729 |
| Immune cell proportion | 0.70 (0.13) | 0.71 (0.12) | 0.457 | 0.67 (0.16) | 0.66 (0.16) | 0.507 |
| cm1inpov Constructed - Poverty ratio - mother’s household income/poverty threshold | 1.93 (1.92) | 1.18 (1.28) | <0.001 | 1.92 (1.91) | 1.06 (1.12) | <0.001 |
| cm1ethrace Mother race (baseline own report): | 0.021 | 0.228 | ||||
| 1 white, non-hispanic | 10 (2.81%) | 8 (8.33%) | 9 (2.48%) | 4 (4.65%) | ||
| 2 black, non-hispanic | 320 (89.9%) | 82 (85.4%) | 329 (90.6%) | 76 (88.4%) | ||
| 3 hispanic | 20 (5.62%) | 2 (2.08%) | 18 (4.96%) | 2 (2.33%) | ||
| 4 other | 5 (1.40%) | 4 (4.17%) | 6 (1.65%) | 4 (4.65%) | ||
| Missing | 1 (0.28%) | 0 (0.00%) | 1 (0.28%) | 0 (0.00%) | ||
| cm1bsex Constructed - Focal baby’s gender: | 0.908 | 0.905 | ||||
| 1 Boy | 179 (50.3%) | 47 (49.0%) | 180 (49.6%) | 42 (48.8%) | ||
| 2 Girl | 177 (49.7%) | 49 (51.0%) | 183 (50.4%) | 44 (51.2%) | ||
| Missing | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | ||
| Any alcohol during pregnancy: | <0.001 | <0.001 | ||||
| No | 333 (93.5%) | 64 (66.7%) | 340 (93.7%) | 59 (68.6%) | ||
| Yes | 23 (6.46%) | 32 (33.3%) | 23 (6.34%) | 27 (31.4%) | ||
| Any drugs during pregnancy: | <0.001 | <0.001 | ||||
| No | 348 (97.8%) | 77 (80.2%) | 356 (98.1%) | 67 (77.9%) | ||
| Yes | 8 (2.25%) | 19 (19.8%) | 7 (1.93%) | 19 (22.1%) | ||
| Any postnatal smoking (age 1 & 5): | <0.001 | <0.001 | ||||
| Maternal smoking at age 1 or age 5 | 93 (26.1%) | 93 (96.9%) | 95 (26.2%) | 83 (96.5%) | ||
| No maternal smoking at age 1 and age 5 | 263 (73.9%) | 3 (3.12%) | 268 (73.8%) | 3 (3.49%) | ||
| Mother/PCG (9/15 yr) smoking in month prior to interview (pk/day): | <0.001 | <0.001 | ||||
| Less than pack a day | 61 (17.1%) | 53 (55.2%) | 59 (16.3%) | 55 (64.0%) | ||
| Missing | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | ||
| No smoking | 279 (78.4%) | 12 (12.5%) | 299 (82.4%) | 21 (24.4%) | ||
| Pack or more a day | 16 (4.49%) | 31 (32.3%) | 5 (1.38%) | 10 (11.6%) | ||
| k5f1l F1L. Smoked a cigarette or used tobacco: | . | 1.000 | ||||
| 1 yes | 0 (0.00%) | 0 (0.00%) | 1 (0.28%) | 0 (0.00%) | ||
| 2 no | 356 (100%) | 96 (100%) | 357 (98.3%) | 85 (98.8%) | ||
| Missing | 0 (0.00%) | 0 (0.00%) | 5 (1.38%) | 1 (1.16%) | ||
| k6d40 ID: | 0.002 | . | ||||
| 1 Yes | 6 (1.69%) | 9 (9.38%) | 0 (0.00%) | 0 (0.00%) | ||
| 2 No | 346 (97.2%) | 86 (89.6%) | 363 (100%) | 86 (100%) | ||
| Missing | 4 (1.12%) | 1 (1.04%) | 0 (0.00%) | 0 (0.00%) | ||
| f1g4 In the past 3 months, how many cigarettes did you smoke a day?: | <0.001 | 0.001 | ||||
| 1 2+pk/d | 7 (1.97%) | 0 (0.00%) | 6 (1.65%) | 0 (0.00%) | ||
| 2 1<pk<2 | 27 (7.58%) | 9 (9.38%) | 23 (6.34%) | 11 (12.8%) | ||
| 3 <1pk/d | 82 (23.0%) | 40 (41.7%) | 86 (23.7%) | 32 (37.2%) | ||
| 4 None | 182 (51.1%) | 27 (28.1%) | 184 (50.7%) | 24 (27.9%) | ||
| Missing | 58 (16.3%) | 20 (20.8%) | 64 (17.6%) | 19 (22.1%) |
smkE<-strataTable(createTable(compareGroups(smkPreg_binary~globalmethylation+SSnewbornCT_notransform+SSolder_notransform+cg05575921+pediatric+levine+Epi+Fib+IC+cm1inpov+cm1ethrace+cm1bsex+m1g2_YesNoPreg+m1g3_YesNoPreg+PostnatalMaternalSmokingAny+SmkAtVisitPastmonth+k5f1l+k6d40+f1g4, simplify=FALSE, data=tabledata%>%filter(ancestry=='European ancestry' & SmkAtVisitPastmonth!="Missing")%>%copy_labels(tabledata)), show.all=TRUE), 'childteen')
export2md(smkE, caption="Smoked during pregnancy vs covariates: European ancestry only")| [ALL] | No | Yes | p.overall | [ALL] | No | Yes | p.overall | |
|---|---|---|---|---|---|---|---|---|
| N=115 | N=81 | N=34 | N=119 | N=86 | N=33 | |||
| Global methylation | 0.48 (0.01) | 0.48 (0.01) | 0.48 (0.01) | 0.336 | 0.48 (0.01) | 0.48 (0.01) | 0.48 (0.01) | 0.928 |
| PES: Sustained smoking, newborn cordblood (+CT control) | 2.61 (0.24) | 2.56 (0.21) | 2.73 (0.26) | 0.001 | 2.64 (0.27) | 2.61 (0.26) | 2.73 (0.28) | 0.037 |
| PES: Sustained smoking, older children peripheral blood | 2.39 (0.24) | 2.35 (0.21) | 2.51 (0.25) | 0.001 | 2.42 (0.24) | 2.39 (0.23) | 2.48 (0.25) | 0.060 |
| cg05575291 methylation | 0.78 (0.05) | 0.78 (0.05) | 0.77 (0.05) | 0.453 | 0.78 (0.05) | 0.79 (0.05) | 0.77 (0.05) | 0.102 |
| pediatric methylation clock | 8.85 (0.66) | 8.87 (0.64) | 8.82 (0.71) | 0.695 | 11.1 (1.48) | 11.1 (1.54) | 11.2 (1.34) | 0.793 |
| levine methylation clock | 5.56 (4.79) | 5.59 (4.69) | 5.48 (5.08) | 0.917 | 14.8 (5.58) | 14.4 (5.47) | 15.9 (5.80) | 0.203 |
| Epithelial cell proportion | 0.26 (0.12) | 0.27 (0.11) | 0.24 (0.12) | 0.143 | 0.27 (0.13) | 0.27 (0.13) | 0.27 (0.12) | 0.952 |
| Fibroid cell proportion | 0.02 (0.01) | 0.01 (0.01) | 0.02 (0.01) | 0.309 | 0.01 (0.01) | 0.01 (0.01) | 0.01 (0.01) | 0.489 |
| Immune cell proportion | 0.72 (0.11) | 0.71 (0.11) | 0.75 (0.11) | 0.150 | 0.72 (0.12) | 0.71 (0.13) | 0.72 (0.11) | 0.911 |
| cm1inpov Constructed - Poverty ratio - mother’s household income/poverty threshold | 4.48 (3.38) | 5.33 (3.51) | 2.45 (1.89) | <0.001 | 4.45 (3.39) | 5.23 (3.51) | 2.44 (1.95) | <0.001 |
| cm1ethrace Mother race (baseline own report): | 0.580 | 0.307 | ||||||
| 1 white, non-hispanic | 111 (96.5%) | 79 (97.5%) | 32 (94.1%) | 115 (96.6%) | 84 (97.7%) | 31 (93.9%) | ||
| 2 black, non-hispanic | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | ||
| 3 hispanic | 4 (3.48%) | 2 (2.47%) | 2 (5.88%) | 4 (3.36%) | 2 (2.33%) | 2 (6.06%) | ||
| 4 other | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | ||
| Missing | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | ||
| cm1bsex Constructed - Focal baby’s gender: | 0.310 | 0.414 | ||||||
| 1 Boy | 53 (46.1%) | 40 (49.4%) | 13 (38.2%) | 51 (42.9%) | 39 (45.3%) | 12 (36.4%) | ||
| 2 Girl | 62 (53.9%) | 41 (50.6%) | 21 (61.8%) | 68 (57.1%) | 47 (54.7%) | 21 (63.6%) | ||
| Missing | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | ||
| Any alcohol during pregnancy: | 0.385 | 0.048 | ||||||
| No | 92 (80.0%) | 67 (82.7%) | 25 (73.5%) | 98 (82.4%) | 75 (87.2%) | 23 (69.7%) | ||
| Yes | 23 (20.0%) | 14 (17.3%) | 9 (26.5%) | 21 (17.6%) | 11 (12.8%) | 10 (30.3%) | ||
| Any drugs during pregnancy: | 0.002 | <0.001 | ||||||
| No | 110 (95.7%) | 81 (100%) | 29 (85.3%) | 112 (94.1%) | 86 (100%) | 26 (78.8%) | ||
| Yes | 5 (4.35%) | 0 (0.00%) | 5 (14.7%) | 7 (5.88%) | 0 (0.00%) | 7 (21.2%) | ||
| Any postnatal smoking (age 1 & 5): | <0.001 | <0.001 | ||||||
| Maternal smoking at age 1 or age 5 | 54 (47.0%) | 23 (28.4%) | 31 (91.2%) | 52 (43.7%) | 22 (25.6%) | 30 (90.9%) | ||
| No maternal smoking at age 1 and age 5 | 61 (53.0%) | 58 (71.6%) | 3 (8.82%) | 67 (56.3%) | 64 (74.4%) | 3 (9.09%) | ||
| Mother/PCG (9/15 yr) smoking in month prior to interview (pk/day): | <0.001 | <0.001 | ||||||
| Less than pack a day | 32 (27.8%) | 14 (17.3%) | 18 (52.9%) | 20 (16.8%) | 8 (9.30%) | 12 (36.4%) | ||
| Missing | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | ||
| No smoking | 66 (57.4%) | 60 (74.1%) | 6 (17.6%) | 85 (71.4%) | 73 (84.9%) | 12 (36.4%) | ||
| Pack or more a day | 17 (14.8%) | 7 (8.64%) | 10 (29.4%) | 14 (11.8%) | 5 (5.81%) | 9 (27.3%) | ||
| k5f1l F1L. Smoked a cigarette or used tobacco: | . | 0.681 | ||||||
| 1 yes | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 1 (0.84%) | 1 (1.16%) | 0 (0.00%) | ||
| 2 no | 115 (100%) | 81 (100%) | 34 (100%) | 115 (96.6%) | 82 (95.3%) | 33 (100%) | ||
| Missing | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 3 (2.52%) | 3 (3.49%) | 0 (0.00%) | ||
| k6d40 ID: | 0.322 | . | ||||||
| 1 Yes | 6 (5.22%) | 3 (3.70%) | 3 (8.82%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | ||
| 2 No | 107 (93.0%) | 77 (95.1%) | 30 (88.2%) | 119 (100%) | 86 (100%) | 33 (100%) | ||
| Missing | 2 (1.74%) | 1 (1.23%) | 1 (2.94%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | ||
| f1g4 In the past 3 months, how many cigarettes did you smoke a day?: | <0.001 | <0.001 | ||||||
| 1 2+pk/d | 3 (2.61%) | 1 (1.23%) | 2 (5.88%) | 3 (2.52%) | 1 (1.16%) | 2 (6.06%) | ||
| 2 1<pk<2 | 14 (12.2%) | 4 (4.94%) | 10 (29.4%) | 14 (11.8%) | 6 (6.98%) | 8 (24.2%) | ||
| 3 <1pk/d | 21 (18.3%) | 11 (13.6%) | 10 (29.4%) | 23 (19.3%) | 13 (15.1%) | 10 (30.3%) | ||
| 4 None | 60 (52.2%) | 53 (65.4%) | 7 (20.6%) | 63 (52.9%) | 55 (64.0%) | 8 (24.2%) | ||
| Missing | 17 (14.8%) | 12 (14.8%) | 5 (14.7%) | 16 (13.4%) | 11 (12.8%) | 5 (15.2%) |
smkH<-strataTable(createTable(compareGroups(smkPreg_binary~globalmethylation+SSnewbornCT_notransform+SSolder_notransform+cg05575921+pediatric+levine+Epi+Fib+IC+cm1inpov+cm1ethrace+cm1bsex+m1g2_YesNoPreg+m1g3_YesNoPreg+PostnatalMaternalSmokingAny+SmkAtVisitPastmonth+k5f1l+k6d40+f1g4, data=tabledata%>%filter(ancestry=='Hispanic ancestry')%>%copy_labels(tabledata)), show.all=F), 'childteen')
export2md(smkH, caption="Smoked during pregnancy vs covariates: Hispanic ancestry only")| No | Yes | p.overall | No | Yes | p.overall | |
|---|---|---|---|---|---|---|
| N=137 | N=19 | N=134 | N=18 | |||
| Global methylation | 0.48 (0.01) | 0.48 (0.01) | 0.611 | 0.48 (0.01) | 0.48 (0.01) | 0.034 |
| PES: Sustained smoking, newborn cordblood (+CT control) | 2.56 (0.21) | 2.63 (0.21) | 0.203 | 2.60 (0.20) | 2.68 (0.18) | 0.085 |
| PES: Sustained smoking, older children peripheral blood | 2.37 (0.22) | 2.49 (0.21) | 0.029 | 2.34 (0.26) | 2.56 (0.15) | <0.001 |
| cg05575291 methylation | 0.78 (0.05) | 0.79 (0.05) | 0.481 | 0.76 (0.06) | 0.80 (0.06) | 0.028 |
| pediatric methylation clock | 8.85 (1.05) | 8.65 (0.79) | 0.335 | 11.5 (1.82) | 10.8 (1.60) | 0.086 |
| levine methylation clock | 6.71 (5.63) | 6.85 (5.60) | 0.923 | 17.7 (6.50) | 15.9 (4.99) | 0.165 |
| Epithelial cell proportion | 0.25 (0.11) | 0.22 (0.11) | 0.365 | 0.30 (0.15) | 0.21 (0.11) | 0.007 |
| Fibroid cell proportion | 0.02 (0.01) | 0.01 (0.01) | 0.404 | 0.01 (0.01) | 0.01 (0.01) | 0.622 |
| Immune cell proportion | 0.74 (0.11) | 0.77 (0.11) | 0.323 | 0.69 (0.14) | 0.77 (0.11) | 0.007 |
| cm1inpov Constructed - Poverty ratio - mother’s household income/poverty threshold | 1.93 (2.23) | 1.00 (0.62) | <0.001 | 1.95 (2.29) | 0.89 (0.53) | <0.001 |
| cm1ethrace Mother race (baseline own report): | 0.020 | 0.011 | ||||
| 1 white, non-hispanic | 9 (6.57%) | 6 (31.6%) | 8 (5.97%) | 6 (33.3%) | ||
| 2 black, non-hispanic | 4 (2.92%) | 0 (0.00%) | 4 (2.99%) | 0 (0.00%) | ||
| 3 hispanic | 109 (79.6%) | 13 (68.4%) | 107 (79.9%) | 12 (66.7%) | ||
| 4 other | 14 (10.2%) | 0 (0.00%) | 14 (10.4%) | 0 (0.00%) | ||
| Missing | 1 (0.73%) | 0 (0.00%) | 1 (0.75%) | 0 (0.00%) | ||
| cm1bsex Constructed - Focal baby’s gender: | 0.810 | 0.803 | ||||
| 1 Boy | 67 (48.9%) | 10 (52.6%) | 61 (45.5%) | 9 (50.0%) | ||
| 2 Girl | 70 (51.1%) | 9 (47.4%) | 73 (54.5%) | 9 (50.0%) | ||
| Missing | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | ||
| Any alcohol during pregnancy: | 0.001 | 0.004 | ||||
| No | 131 (95.6%) | 13 (68.4%) | 128 (95.5%) | 13 (72.2%) | ||
| Yes | 6 (4.38%) | 6 (31.6%) | 6 (4.48%) | 5 (27.8%) | ||
| Any drugs during pregnancy: | 0.073 | 0.069 | ||||
| No | 135 (98.5%) | 17 (89.5%) | 132 (98.5%) | 16 (88.9%) | ||
| Yes | 2 (1.46%) | 2 (10.5%) | 2 (1.49%) | 2 (11.1%) | ||
| Any postnatal smoking (age 1 & 5): | <0.001 | <0.001 | ||||
| Maternal smoking at age 1 or age 5 | 30 (21.9%) | 18 (94.7%) | 34 (25.4%) | 17 (94.4%) | ||
| No maternal smoking at age 1 and age 5 | 107 (78.1%) | 1 (5.26%) | 100 (74.6%) | 1 (5.56%) | ||
| Mother/PCG (9/15 yr) smoking in month prior to interview (pk/day): | <0.001 | 0.002 | ||||
| Less than pack a day | 16 (11.7%) | 11 (57.9%) | 15 (11.2%) | 8 (44.4%) | ||
| Missing | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | ||
| No smoking | 119 (86.9%) | 5 (26.3%) | 118 (88.1%) | 10 (55.6%) | ||
| Pack or more a day | 2 (1.46%) | 3 (15.8%) | 1 (0.75%) | 0 (0.00%) | ||
| k5f1l F1L. Smoked a cigarette or used tobacco: | . | 1.000 | ||||
| 1 yes | 0 (0.00%) | 0 (0.00%) | 1 (0.75%) | 0 (0.00%) | ||
| 2 no | 137 (100%) | 19 (100%) | 132 (98.5%) | 18 (100%) | ||
| Missing | 0 (0.00%) | 0 (0.00%) | 1 (0.75%) | 0 (0.00%) | ||
| k6d40 ID: | 0.350 | . | ||||
| 1 Yes | 8 (5.84%) | 2 (10.5%) | 0 (0.00%) | 0 (0.00%) | ||
| 2 No | 129 (94.2%) | 17 (89.5%) | 134 (100%) | 18 (100%) | ||
| Missing | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | 0 (0.00%) | ||
| f1g4 In the past 3 months, how many cigarettes did you smoke a day?: | 0.054 | 0.020 | ||||
| 1 2+pk/d | 0 (0.00%) | 1 (5.26%) | 0 (0.00%) | 1 (5.56%) | ||
| 2 1<pk<2 | 11 (8.03%) | 1 (5.26%) | 12 (8.96%) | 1 (5.56%) | ||
| 3 <1pk/d | 24 (17.5%) | 7 (36.8%) | 24 (17.9%) | 7 (38.9%) | ||
| 4 None | 81 (59.1%) | 9 (47.4%) | 78 (58.2%) | 9 (50.0%) | ||
| Missing | 21 (15.3%) | 1 (5.26%) | 20 (14.9%) | 0 (0.00%) |
When stratified by ancestry, relationship between cg05575291 methylation and smoking remains in African ancestry group, pattern of hypomethylation exists in teen measures of European ancestry group but not child measures, and actually non-significant hypermethylation in Hispanic ancestry group.
At child and teen visit, variables to do with Father and Mother hispanic identity and ethnicity associated with global methylation, as did baby sex.
Some maternal/primary care giver smoking variables from year 1, 9 and 15 also associated. A note on the question PCG Year 9 Smoke and related questions n5f17 and n5f18 - these variables are weird, with lots of missing values, need to check the FF question guide, but my impression is that they were only asked to children for whom the PCG was not the mom (PCG and Core mother survey were combined in wave 6 - year 15).
Base model controls for ancestry, childsex, maternal income to poverty ratio, Epi cells, IC cells, and child age in months at sample.
#define function for use in purr
apriori_fun = function(response, dat, groupF, vars){
exclude=ifelse(groupF=='C', 'cp6yagem', ifelse(groupF=='T', 'cm5b_age', NA))
vars=vars[vars!=exclude]
form=paste0(response, "~", paste(vars, collapse='+'))
tidy(lm(as.formula(form), data=dat %>% filter(childteen==groupF)), conf.int=T, conf.level=0.95)
}
#read in top CpGs from Table One of Wicklund et al 2019
topCPGs<-read.csv(paste0(datadir, "TopCpGsWiklundEtAl2019.csv"))
topCPGs<-topCPGs %>% filter(CpG %in% aprioriCG)%>% mutate(estimate=as.numeric(gsub("−\\s", "-", gsub(" .*", "", β..SE.))), std.error=as.numeric(sub("\\).*", "", sub(".*\\(", "", β..SE.))), p.value=P.value, source="Wiklund et al. 2019", conf.low=(estimate-1.96*std.error), conf.high=(estimate+1.96*std.error))%>%select(CpG, Chr, Position, Gene, estimate, std.error, p.value, conf.low, conf.high, source)
#purr over the aprioriCpGs
vars=aprioriCG
vars=purrr::set_names(vars)
#ages=c('C', 'T')
#ages=purrr::set_names(ages)
models_C= vars%>% purrr::map(apriori_fun, dat=basemodeldata, groupF='C', vars=basemodelvar)
models_T= vars%>% purrr::map(apriori_fun, dat=basemodeldata, groupF='T', vars=basemodelvar)
models=list('child'=models_C, 'teens'=models_T)
#pull out list of tidy models into a datafram
modeldf<-left_join(bind_rows(unlist(models, recursive=F), .id="id") %>% filter(term=='smkPreg_binaryYes')%>%mutate(source=paste0("Fragile Families: ", age=gsub("\\..*", "", id)), CpG=gsub(".*\\.", "", id)), topCPGs %>%select(CpG, Chr, Position, Gene))%>%select(CpG, Chr, Position, Gene, estimate, std.error, p.value, conf.low, conf.high, source)## Joining, by = "CpG"
#combine
modeldf<-rbind(modeldf, topCPGs)%>%mutate(label=paste0(Gene, ": ", CpG))
#plot
ggplot(modeldf, aes(x=estimate, xmin=conf.low, xmax=conf.high, color=source, y=label))+geom_point(position=position_dodge(width=1))+geom_errorbarh(height=0.1, position=position_dodge(width=1))+scale_x_continuous()+scale_y_discrete(name="CpGs (a priori)")+geom_vline(xintercept=0, color='red', linetype="dashed", alpha=0.5)+ggtitle("Wiklund 2019 vs Fragile Families at 4 selected a priori CpGs: \n Base model")Variables
Base model: Binary smoking variable [X], ancestry categorization, baby sex [X], maternal income to poverty ratio [X], child’s age in month at visit of interest (note, currently using a constructed child’s age at visit, where individuals having a missing child’s age at home visit was supplemented with child’s age at home interview) [X], cell types (Epi + IC, may need to take one out),
Prenatal exposures: Base model variables + prenatal maternal alcohol use (Yes/no) [X], prenatal maternal drug use (yes/no)
Secondhand smoke exposure models: Prenatal exposures model+ any maternal smoking at age 1 & 5 [X], maternal or PCG smoking dose at visit of interest (age 9 or 15) [X]
First hand smoking: Secondhand smoke exposure models + exclude children with missing or reported firsthand smoking at visit of interest (age 9 or 15)
Paternal smoking: Filter to individuals with data on prenatal paternal smoking and compare effect size of prenatal paternal to prenatal maternal (as per Wiklund et al. 2019, idea is that this captures additional confounding)
Ancestry specific models - by ancestry groups; replace ancestry control with control for local PCs within ancestry group
Longitduinal models - no additional filtering needed
save(child_base, file=paste0(datadir, 'CreatedData/', 'ChildBaseModelData.Rdata'))
save(teen_base, file=paste0(datadir, 'CreatedData/', 'ChildBaseModelData.Rdata'))
save(basemodeldata, file=paste0(datadir, 'CreatedData/', 'BaseModelData.Rdata'))
save(child_prenatal, file=paste0(datadir, 'CreatedData/', 'ChildPrenatalExModelData.Rdata'))
save(teen_prenatal, file=paste0(datadir, 'CreatedData/', 'PrenatalExModelData.Rdata'))
save(prenatalexdata, file=paste0(datadir, 'CreatedData/', 'PrenatalExModelData.Rdata'))
save(child_secondhand, file=paste0(datadir, 'CreatedData/', 'ChildSecondHandSmkModelData.Rdata'))
save(teen_secondhand, file=paste0(datadir, 'CreatedData/', 'TeenSecondHandSmkModelData.Rdata'))
save(twovisit_secondhand, file=paste0(datadir, 'CreatedData/', 'TwoVisitSecondHandSmkModelData.Rdata'))
save(secondhandsmkdata, file=paste0(datadir, 'CreatedData/', 'SecondHandSmkModelData.Rdata'))
save(child_nosmoke, file=paste0(datadir, 'CreatedData/', 'ChildNoSmkModelData.Rdata'))
save(teen_nosmoke, file=paste0(datadir, 'CreatedData/', 'TeenNoSmkModelData.Rdata'))
save(twovisit_nosmoke, file=paste0(datadir, 'CreatedData/', 'TwoVisitNoSmkModelData.Rdata'))
save(fathersmkdata, file=paste0(datadir, 'CreatedData/', 'FatherSmkModelData.Rdata'))
save(list=c("basemodeldata", "prenatalexdata", "secondhandsmkdata", "child_secondhand", "teen_secondhand", "twovisit_secondhand", "twovisit_nosmoke", "child_nosmoke", "teen_nosmoke", "fathersmkdata"), file=paste0(datadir, 'CreatedData/', Sys.Date(), 'CreatedData.Rdata'))